/**Nitya Pandalai Nayar*/
/**Updated Sept 06, 2016 */

/**This File Creates the Firm-HS-Country-Month Dataset (split by AL/RP) for ER Project, exploring various quantity definitions */

libname fromlftd  "";
libname outloc "";

/** *******************************************************************************/
/** 			SUMMARY OF THIS FILE					 */
/**										*/
/** FILES THAT MUST BE RUN FIRST -- NONE 				       */
/** SECTION 1: Imports 							      */
/** 	Part 1. 1993-2000 Years						     */
/**		Step 1 Read in and Restrict variables 			    */
/** 		Step 2 Create Extracts 					   */
/**		Step 3 Collapse Down to EIN Alpha HS Country 		  */
/**		       Month QTY1_Con QTY1_IM for NON-RELATED TRANSACTIONS */
/**		Step 4 Collapse Down to EIN Alpha HS Country 		*/
/**		       Month QTY1_Con QTY1_IM for RELATED TRANSACTIONS  */
/** 		Step 5 Now Append Everything Together 		      */
/**		 Output Data in Stata Format 			     */
/**     Part 2: 2001-2011 years (same steps as above)               */
/**     Part 3: Same as above but with SWT instead		   */	
/** OUTPUT alt_mon_imp_&year.dta	   		            */
/** ********************************************************/

/** ****************************************************** */
/** SECTION A: IMPORTS -- value files					  */
/** ***************************************************  */



/** *******************************************************************************/
/** Step 1. Concord NAICS Codes for Years 2001-2009				 */
/** *****************************************************************************/

/**MACRO to loop through years */
%macro byyear(year);


/** Step 4.1 Read in Concordance Data for LFTTD */

	proc import out = hs_naics_import
		FILE="hs_sic_naics_imports_89_09.csv"
		dbms= CSV replace;
	run;

	data hs_naics_import;
		set hs_naics_import (keep = commodity naics year);
		if year + 1900 = &year;
		hst = trim(left(commodity));
		hs = '0000000000';
		if length(hst) lt xx then substr(hs,11-length(hst))=hst;
		else hs = hst;
	run;


	proc sort data=hs_naics_import (drop = commodity hst);
		by hs;
	run;


/** Step 4.2: Bring in LFTTD */

data impyr;
	set fromlftd.imp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 qty2 RELATED VALUE firmid 
	mot qty1_con  qty1_imp qty2_con qty2_imp);
	/*remove if firmid and ein2 are missing */
	/* IF ein="" AND firmid="" THEN delete; */

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	month2 = 00;
	month2=ORIG_MON;
	
run;

data impyr;
	set impyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;


/** Step 4.3 Merge in NAICS Information -- Do for each year to remove any that don't match */

	proc sort data=impyr;
		by hs;
	run;

	data impyrnaics1;
		merge impyr (in=data1) hs_naics_import (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=impyrnaics1;
		tables m_var;
	run;

	data impyrnaics;
		set impyrnaics1;
		if m_var=2 then delete;
		if m_var=1 then delete;
	run;

/** Step 4.4 Create Extracts */
/* Related Extract */
data imprel;
	set impyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data impnonrel;
	set impyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty1;*/
	year = &year;
	rename month2=month;
run;


/**Step 4.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=impnonrel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=impnonrel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monimpnonrel;
	set monimpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 4.6 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=imprel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=imprel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimprel sum(value qty1) = relvalue relqty;
run;

data monimprel;
	set monimprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 4.7 Now Append Everything Together */
data monimp;
	set monimprel monimpnonrel;
	
run;

proc export data = monimp
	outfile = "imp_mon_&year"
	dbms = stata replace;
run;

%mend;


%byyear(2008);
%byyear(2009);



/** ***********************************************************/
/** STEP 5. 2010 - 2011 YEARS			     	     */
/** *********************************************************/

/** Do Year = 2010 */
%let year = 2010;

/** Step 5.1 Read in Concordance Data for LFTTD */

proc import out = hs_naics_import
	FILE="CONCORD12_2010_IMPORT.csv"
	dbms= CSV replace;
run;
data hs_naics_import;
	set hs_naics_import (keep=commodity naics);
	hst = trim(left(commodity));
	hs = '0000000000';
	if length(hst) lt xx then substr(hs,11-length(hst))=hst;
	else hs = hst;
run;

proc sort data=hs_naics_import (drop = commodity hst);
	by hs;
run;

/** Step 5.2: Bring in LFTTD */
data impyr;
	set fromlftd.imp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 qty2 RELATED VALUE firmid 
	mot qty1_con  qty1_imp qty2_con qty2_imp);
	/*remove if firmid and ein2 are missing */
	/* IF ein="" AND firmid="" THEN delete; */

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	month2 = 00;
	month2=ORIG_MON;
	
run;

data impyr;
	set impyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;


/** Step 5.3 Merge in NAICS Information -- Do for each year to remove any that don't match */

	proc sort data=impyr;
		by hs;
	run;

	data impyrnaics1;
		merge impyr (in=data1) hs_naics_import (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=impyrnaics1;
		tables m_var;
	run;

	data impyrnaics;
		set impyrnaics1;
		if m_var=2 then delete;
		if m_var=1 then delete;
	run;

/** Step 5.4 Create Extracts */
/* Related Extract */
data imprel;
	set impyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data impnonrel;
	set impyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty1;*/
	year = &year;
	rename month2=month;
run;


/**Step 5.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=impnonrel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=impnonrel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monimpnonrel;
	set monimpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 5.6 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=imprel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=imprel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimprel sum(value qty1) = relvalue relqty;
run;

data monimprel;
	set monimprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5.7 Now Append Everything Together */
data monimp;
	set monimprel monimpnonrel;
	
run;

proc export data = monimp
	outfile = "imp_mon_&year"
	dbms = stata replace;
run;



/** Do Year = 2011 */

%let year = 2011;

/** Step 5.1 Read in Concordance Data for LFTTD */

proc import out = hs_naics_import
		FILE="CONCORD01_2011_IMPORT.csv"
		dbms= CSV replace;
run;

data hs_naics_import;
	set hs_naics_import (keep=commodity naics);
	hst = trim(left(commodity));
	hs = '0000000000';
	if length(hst) lt xx then substr(hs,11-length(hst))=hst;
	else hs = hst;
run;
	
proc sort data=hs_naics_import (drop= commodity hst);
	by hs;
run;

/** Step 5.2: Bring in LFTTD */
data impyr;
	set fromlftd.imp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 qty2 RELATED VALUE firmid 
	mot qty1_con  qty1_imp qty2_con qty2_imp);
	/*remove if firmid and ein2 are missing */
	/* IF ein="" AND firmid="" THEN delete; */

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	month2 = 00;
	month2=ORIG_MON;
	
run;

data impyr;
	set impyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;


/** Step 5.3 Merge in NAICS Information -- Do for each year to remove any that don't match */

	proc sort data=impyr;
		by hs;
	run;

	data impyrnaics1;
		merge impyr (in=data1) hs_naics_import (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=impyrnaics1;
		tables m_var;
	run;

	data impyrnaics;
		set impyrnaics1;
		if m_var=2 then delete;
		if m_var=1 then delete;
	run;

/** Step 5.4 Create Extracts */
/* Related Extract */
data imprel;
	set impyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data impnonrel;
	set impyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty1;*/
	year = &year;
	rename month2=month;
run;


/**Step 5.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=impnonrel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=impnonrel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monimpnonrel;
	set monimpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 5.6 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=imprel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=imprel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimprel sum(value qty1) = relvalue relqty;
run;

data monimprel;
	set monimprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5.7 Now Append Everything Together */
data monimp;
	set monimprel monimpnonrel;
	
run;

proc export data = monimp
	outfile = "imp_mon_&year"
	dbms = stata replace;
run;



/************************************************************************
*** 2012-2013 -- use 2011 concordance for lack of better option
*************************************************************************/
/** Do Year = 2012 */

%let year = 2012;

/** Step 5.1 Read in Concordance Data for LFTTD */

proc import out = hs_naics_import
		FILE="CONCORD01_2011_IMPORT.csv"
		dbms= CSV replace;
run;

data hs_naics_import;
	set hs_naics_import (keep=commodity naics);
	hst = trim(left(commodity));
	hs = '0000000000';
	if length(hst) lt xx then substr(hs,11-length(hst))=hst;
	else hs = hst;
run;
	
proc sort data=hs_naics_import (drop= commodity hst);
	by hs;
run;

/** Step 5.2: Bring in LFTTD */
data impyr;
	set fromlftd.imp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 qty2 RELATED VALUE firmid 
	mot qty1_con  qty1_imp qty2_con qty2_imp);
	/*remove if firmid and ein2 are missing */
	/* IF ein="" AND firmid="" THEN delete; */

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	month2 = 00;
	month2=ORIG_MON;
	
run;

data impyr;
	set impyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;


/** Step 5.3 Merge in NAICS Information -- Do for each year to remove any that don't match */

	proc sort data=impyr;
		by hs;
	run;

	data impyrnaics1;
		merge impyr (in=data1) hs_naics_import (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=impyrnaics1;
		tables m_var;
	run;

	data impyrnaics;
		set impyrnaics1;
		if m_var=2 then delete;
		if m_var=1 then delete;
	run;

/** Step 5.4 Create Extracts */
/* Related Extract */
data imprel;
	set impyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data impnonrel;
	set impyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty1;*/
	year = &year;
	rename month2=month;
run;


/**Step 5.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=impnonrel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=impnonrel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monimpnonrel;
	set monimpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 5.6 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=imprel;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=imprel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monimprel sum(value qty1) = relvalue relqty;
run;

data monimprel;
	set monimprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5.7 Now Append Everything Together */
data monimp;
	set monimprel monimpnonrel;
	
run;

proc export data = monimp
	outfile = "imp_mon_&year"
	dbms = stata replace;
run;


